################################################################################ 
#
# Replication code for:
# Empathy-based counterspeech can reduce racist hate speech
# in a social media field experiment
#
# PNAS 2021
# 
# 
# Additional Unpublished Analysis
# --------------------------------
# 
# For each user in the sample, we retrieve retrospectively network information.
# In particular, we look at users' followers and friends that are also included 
# in the study sample. This information is recorded in the dataset "followers_data.csv".
# We then repeat our main analysis excluding users that have been found to have
# links to other subjects. We show that our results are robust to eliminating interferences.
# 
################################################################################ 



rm(list = ls())
setsave = F

################################################################################ 
#  LIBRARIES
################################################################################ 

library(stargazer)
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(estimatr)
library(xtable)
library(ggpubr)
library(lmtest)
library(sandwich) 

################################################################################ 
#   DATA AND FOLDER
################################################################################ 

wd = dirname(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
wd_res = paste0(wd, '/results')
wd_data =  paste0(wd, '/data')

################################################################################ 
################################################################################


setwd(wd_data)

data = read.csv('main_dataset.csv') %>% as.data.frame()
lassodata = read_rds('data_lasso2.RDS')
lassodata = lassodata[-1]
lassodata = lapply(lassodata, scale) %>% as.data.frame()
data = data[(is.na(data$deleted_suspended)), ]
data = cbind(data, lassodata)


follow = read.csv('auxiliary/followers_data.csv') %>% as.data.frame()
follow = follow[(!is.na(follow$followed_by_itt) & !is.na(follow$friends_in_itt) ) ,]

# Summary variables
table(follow$followed_by_itt)[1]/length(follow$exp_id)
table(follow$friends_in_itt)[1]/length(follow$exp_id)

# extract list of user
nofollow = follow$exp_id[(follow$followed_by_itt==0 | follow$friends_in_itt==0)]
follow = follow$exp_id[(follow$followed_by_itt>0 | follow$friends_in_itt>0)]

# Keep only users with no connections
data = data[!(data$exp_id %in% follow),]

colnames(data) = gsub('-', '\\.', names(data))

# Upload controls list
container = read.csv(paste0(wd_data, '/auxiliary/container_lasso_controls.csv'))

container_empathy = list()

count = 0
yvars = c()
formulas = c()
tvars = c()

for (line in 1:18){
  
  count = count+1
  
  t = ifelse(container[line, 4]=="empathy_dummy", 3,
              ifelse(container[line, 4]=="meme_dummy", 1,
                     ifelse(container[line, 4]=="consequences_dummy", 2, 0)))

  temp = data[data$treatment_main %in% c(0, t),]
  
  y = container[line, 1]
  c = container[line, 11] %>% strsplit(', ') %>% unlist() 
  c = sub('^x', '', c)
  c = paste(c, collapse = ' + ')
  f = paste('scale(', y, ') ~ treatment_dummy + ', c)
  
  temp = temp[!is.na(temp[y]),]
  temp = temp[!temp[y]==Inf,]
  
  formulas = c(formulas, f)
  yvars = c(yvars, y)
  tvars = c(tvars, t)
  
  model = lm(as.formula(f), data=temp)
  container_empathy[[count]] = coeftest(model, vcov = vcovHC(model, type = "HC0"))
  }




MOD = container_empathy

coef = c()
for (m in 1:18){ coef = c(coef, MOD[[m]][2,1])}

se = c()
for (m in 1:18){ se = c(se, MOD[[m]][2,2])}

pval = c()
for (m in 1:18){ pval = c(pval, MOD[[m]][2,4])}

container = cbind.data.frame(yvar = yvars,
                             treat = tvars,
                             coef = coef, se=se, pval=pval
                             )
# Give final names to vars
container$yvar_name[container$yvar=="num_hate_tweets_post"] = "# Xenophobic\nTweets"
container$yvar_name[container$yvar=="num_tweets_post"] = "# Total\nTweets"
container$yvar_name[container$yvar=="post_hate_share_tot"] = "Xenophobic\nTweet Share"
container$yvar_name[container$yvar=='outcome_deleted_specific'] ='Xenophobic\nTweet Deleted'
container$yvar_name[container$yvar=="tox_vader_neu_post"] = "Vader\nNegativity"
container$yvar_name[container$yvar=='deletion_rate'] =   'Tweet\nDeletion Rate'


container$categories[container$yvar %in% c('num_hate_tweets_post', 'num_tweets_post', 'post_hate_share_tot')] ='Hate Speech\nCreation'
container$categories[container$yvar %in% c('outcome_deleted_specific', 'deletion_rate')] = 'Hate Speech\nDeletion'
container$categories[container$yvar %in% c('tox_vader_neu_post')] = 'Tone'


container$treatments_lab[container$treat==1] ='Humor'
container$treatments_lab[container$treat==2] = 'Warning of Consequences'
container$treatments_lab[container$treat==3] =   'Empathy'

# Create factors for better plots
container$yvar_name.f = factor(container$yvar_name, levels=unique(container$yvar_name))
container$treatments_lab.f = factor(container$treatments_lab, levels=unique(container$treatments_lab))
container$categories.f = factor(container$categories, levels=unique(container$categories))


makeplot = function(category, title){
  ggplot(container[container$categories %in% category,],
         aes(x = coef, y = yvar_name.f, colour = treatments_lab.f, shape = treatments_lab.f,
             label = round(coef, 2))) + 
    geom_line() +
    geom_point(position = position_dodge(width = 1/2),  size=5)+
    geom_errorbar(aes(xmin=coef-se*1.96, xmax=coef+se*1.96), width=0,
                  position = position_dodge(width = 1/2), size=1) +
    geom_errorbar(aes(xmin=coef-se*1.645, xmax=coef+se*1.645), width=0,
                  position = position_dodge(width = 1/2), size=2) + 
    geom_vline(xintercept=0, color = "black", linetype="dashed") +
    xlab("") + ylab("") + 
    theme_light()  +
    scale_y_discrete(limits=rev)+
    ggtitle(title) + 
    theme(text = element_text(size=28), 
          plot.title = element_text(size=30),
          axis.text = element_text(color="black"),
          legend.position = 'bottom', 
          legend.box = "horizontal", 
          legend.title=element_blank())
}

gg1 = makeplot(c("Hate Speech\nCreation"), "Hate Speech Creation")
gg2 = makeplot(c("Hate Speech\nDeletion", "Tone"), "Hate Speech Deletion and Overall Tone")
ggarrange(gg1, gg2, widths=c(1,1), common.legend=T, legend = 'bottom')



